package utils;

import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.ToAnalysis;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

/**
 * Created by 梅晨 on 2017/7/21.
 */
public class SplitWord {
    HashSet<String> wordTypes;
    public SplitWord(HashSet wordTypes){
        this.wordTypes = wordTypes;
    }

    /**
     * 利用ansj分词器给文本进行分词
     * @param content
     * @return
     */
    public List<String> getSplitWords(String content){
        List<String> wordList = new ArrayList<String>();
        List<Term> allTerms = ToAnalysis.parse(content).getTerms();
        for(Term term : allTerms){
            if(wordTypes.contains(term.getNatureStr())){
                wordList.add(term.getName());
            }
        }
        return wordList;
    }
}
